### Summary ####
# Input: df_f, raw CE files 
# Output: Summary statistics tables and figures in Section 2
# Outline: 
# Part 1: Summary statistics from the final sample
# 1. Table I Panel A Column I and II
# 2. Table I Panel B Column I and II
# 3. Figure III
# 4. Table II
# Part 2: Summary statistics from raw CE (all interviews of CU interviewed in May, Jun, or Jul)
# 5. Table I Panel A Column III and IV
# 6. Table I Panel B Column III and IV
# IMPORTANT: Run code out of order can lead to different results

### ******** Results from the final panel ******** ####

### Summary ####
# Input: df_f, cnt20
# Output: Summary statistics table I Column 1 and 2, Table II, and Figure III

setwd(getwd())

### Data Processing ####
## Open libraries 
library(readr) # For importing datasets
library(readxl) # For importing datasets
library(dplyr) # For data processing 
library(ggplot2) # For graphs
library(weights) # For weighted summary statistics
library(expss) # For frequency tables


## Import data sets
cnt_cl <- read.csv("cnt_cleaned.csv") %>% select(NEWID,RYYMM,REBTUSED,CHCKEFT,EIP) %>% 
  filter(RYYMM==2103|RYYMM==2104|RYYMM==2105|RYYMM==2106|RYYMM==2107|RYYMM==2108) %>% 
  rename(EIPIII=EIP)

df_f <- read.csv("df_f.csv")

## Find the list of NEWIDs in the panel that could have EIPs
df_apr <- df_f %>% filter(YYMM==2104) %>% select(ID, NEWID, FINLWT21_AVG)
df_may <- df_f %>% filter(YYMM==2105) %>% select(ID, NEWID, FINLWT21_AVG)
df_jun <- df_f %>% filter(YYMM==2106) %>% select(ID, NEWID, FINLWT21_AVG)

df_jul <- df_f %>% filter(YYMM==2107) %>% select(ID, NEWID, FINLWT21_AVG)
df_aug <- df_f %>% filter(YYMM==2108) %>% select(ID, NEWID, FINLWT21_AVG)
df_sep <- df_f %>% filter(YYMM==2109) %>% select(ID, NEWID, FINLWT21_AVG)

# Since we consider the first lag, the previous interview of the Jul CUs should also be counted 
df_jul_lag <- df_jul
df_jul_lag$NEWID <- df_jul_lag$NEWID - 1

df_aug_lag <- df_aug
df_aug_lag$NEWID <- df_aug_lag$NEWID - 1

df_sep_lag <- df_sep
df_sep_lag$NEWID <- df_sep_lag$NEWID - 1

# bind all newids together and drop repititions

NEWID_list <- bind_rows(df_apr,df_may,df_jun, df_jul,df_jul_lag,
                        df_aug,df_aug_lag,df_sep,df_sep_lag) %>% distinct(NEWID,.keep_all = TRUE)

EIPIII <- merge(NEWID_list,cnt_cl,by="NEWID")

# Checking interviews with multiple EIPs
# rb1 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 1)
# rb2 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 2)
# rb3 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 3)
# rb4 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 4)
# rb5 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 5)
# rb6 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 6)
# rb7 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 7)
# rb8 <- EIPIII  %>% group_by(NEWID) %>% filter( n() == 8)

# Aggregating to the CU-month level
# EIPIII_m <- EIPIII %>% group_by(NEWID,RYYMM) %>%
#   mutate(TEIPIII = sum(EIPIII)) %>% 
#   distinct(NEWID,RYYMM, .keep_all = TRUE) %>%
#   select(-c(EIPIII))

### Table I Panel E Column I and II ####
#### Column I ####
fre(EIPIII$RYYMM)

#### Column II ####
fre(EIPIII$RYYMM, weight=EIPIII$FINLWT21_AVG)

### Table I Panel F Column I and II ####

# Find number of recipients and number of number of non-recipients
df_f_cu <- df_f %>% distinct(ID, .keep_all=TRUE)

# These are the non-recipients 
df_f_cu_nr <- df_f_cu %>% filter(r==0)

#### Column I ####
1116/3792
length(df_f_cu_nr$ID)/length(df_f_cu$ID)

#### Column II ####
sum(df_f_cu_nr$FINLWT21_AVG)/sum(df_f_cu$FINLWT21_AVG)

### Table II ####
fre(EIPIII$CHCKEFT, weight=EIPIII$FINLWT21_AVG)
fre(EIPIII$REBTUSED, weight=EIPIII$FINLWT21_AVG)


### Table C.3 ####

# Aggregate to CU-3months level
EIPIII_Q <- EIPIII %>%
  group_by(NEWID) %>%
  mutate(EIPIII_t=sum(EIPIII)) %>%
  distinct(NEWID,.keep_all=TRUE) %>%
  ungroup()

table(cut(EIPIII_Q$EIPIII_t,breaks=c(1,1399.9, 1400.1,2799.9,2800.1,4199.9,4200.1, 5599.9,
                                     5600.1, 100000)))


df_f_cu_p <- df_f %>% group_by(ID) %>%
  mutate(EIPIII_t_total=sum(EIPIII_t))

df_f_cu_p <- df_f_cu_p %>% group_by(ID) %>%
  mutate(EIPIII_tm1_total=sum(EIPIII_tm1))

df_f_cu_p$EIPIII_total <- df_f_cu_p$EIPIII_t_total + df_f_cu_p$EIPIII_tm1_total

df_f_cu_p <- df_f_cu_p %>% distinct(ID, .keep_all=TRUE)

df_f_cu_nr_p <- df_f_cu_p %>% filter(EIPIII_total==0)

# Note that we know that there are 1148 non-recipients, so EIP = 0
1148/3982
# 0<EIP<1400
291/3982
# EIP = 1400
839/3982
# 1400 < EIP < 2800
253/3982
# EIP = 2800
751/3982
# 2800 < EIP < 4200
130/3982
# EIP = 4200
191/3982
# 4200 < EIP < 5600
63/3982
# EIP = 5600
153/3982
# EIP > 5600
163/3982

# weighted numbers 
sum(df_f_cu_nr_p$FINLWT21_AVG)
# 0<EIP<1200
EIPIII_Q_1 <- EIPIII_Q %>% filter(EIPIII_t>0 & EIPIII_t <1400)
sum(EIPIII_Q_1$FINLWT21_AVG)
# EIP = 1200
EIPIII_Q_2 <- EIPIII_Q %>% filter(EIPIII_t==1400)
sum(EIPIII_Q_2$FINLWT21_AVG)
# 1200 < EIP < 1700
EIPIII_Q_3 <- EIPIII_Q %>% filter(EIPIII_t>1400 & EIPIII_t <2800)
sum(EIPIII_Q_3$FINLWT21_AVG)
# EIP = 1700
EIPIII_Q_4 <- EIPIII_Q %>% filter(EIPIII_t==2800)
sum(EIPIII_Q_4$FINLWT21_AVG)
# 1700 < EIP < 2400
EIPIII_Q_5 <- EIPIII_Q %>% filter(EIPIII_t>2800 & EIPIII_t <4200)
sum(EIPIII_Q_5$FINLWT21_AVG)
# EIP = 2400
EIPIII_Q_6 <- EIPIII_Q %>% filter(EIPIII_t==4200)
sum(EIPIII_Q_6$FINLWT21_AVG)
# 2400 < EIP < 2900
EIPIII_Q_7 <- EIPIII_Q %>% filter(EIPIII_t>4200 & EIPIII_t <5600)
sum(EIPIII_Q_7$FINLWT21_AVG)
# EIP = 2900
EIPIII_Q_8 <- EIPIII_Q %>% filter(EIPIII_t==5600)
sum(EIPIII_Q_8$FINLWT21_AVG)
# 2900 < EIP < 3400
EIPIII_Q_9 <- EIPIII_Q %>% filter(EIPIII_t>5600)
sum(EIPIII_Q_9$FINLWT21_AVG)


sum(df_f_cu_nr_p$FINLWT21_AVG) + sum(EIPIII_Q_1$FINLWT21_AVG) + sum(EIPIII_Q_2$FINLWT21_AVG)+
  sum(EIPIII_Q_3$FINLWT21_AVG) + sum(EIPIII_Q_4$FINLWT21_AVG) + sum(EIPIII_Q_5$FINLWT21_AVG) + sum(EIPIII_Q_6$FINLWT21_AVG) + 
  sum(EIPIII_Q_7$FINLWT21_AVG) + sum(EIPIII_Q_8$FINLWT21_AVG) + sum(EIPIII_Q_9$FINLWT21_AVG)

sum(df_f_cu_nr_p$FINLWT21_AVG)/102391266
sum(EIPIII_Q_1$FINLWT21_AVG)/102391266
sum(EIPIII_Q_2$FINLWT21_AVG)/102391266
sum(EIPIII_Q_3$FINLWT21_AVG)/102391266
sum(EIPIII_Q_4$FINLWT21_AVG)/102391266
sum(EIPIII_Q_5$FINLWT21_AVG)/102391266
sum(EIPIII_Q_6$FINLWT21_AVG)/102391266
sum(EIPIII_Q_7$FINLWT21_AVG)/102391266
sum(EIPIII_Q_8$FINLWT21_AVG)/102391266
sum(EIPIII_Q_9$FINLWT21_AVG)/102391266

# average EIP 
summary(EIPIII_Q$EIPIII_t)
weighted.mean(EIPIII_Q$EIPIII_t,EIPIII_Q$FINLWT21_AVG)

sd(EIPIII_Q$EIPIII_t)



### Table C.7 ####

# Drop high income
df_f$MARITAL_t <- ifelse(df_f$MARITAL1_t == 1, 1, 0)

# For single, without kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 75000 & FINCBTXM_FST > 50000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 100000 & FINCBTXM_FST > 75000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

# For single, with kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

# For married couple, no kids
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

#225,000

# For married couple, with kids

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

# 225,000

# For adults, no kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 450000 & FINCBTXM_FST > 425000)
table(check$r)

#425,000

# For adults, with kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 325000 & FINCBTXM_FST > 300000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 350000 & FINCBTXM_FST > 325000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 375000 & FINCBTXM_FST > 350000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 400000 & FINCBTXM_FST > 375000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 425000 & FINCBTXM_FST > 400000)
table(check$r)


### ******** Results from raw CE ******** ####

### Summary ####
# Input: cnt20 and fmli193 -- fmli204
# Output: Summary statistics table I column 3 and 4

### Data Processing ####
# Import datasets
cnt <- read_excel("Raw data/cnt21.xlsx") %>% filter(CONTCODE==800) %>% 
  filter(CONTMO!=1 & CONTMO!=2)

fmli203 <- read_excel("Raw data/fmli203.xlsx")
fmli204 <- read_excel("Raw data/fmli204.xlsx")
fmli211 <- read_excel("Raw data/fmli211.xlsx")
fmli212 <- read_excel("Raw data/fmli212.xlsx")
fmli213 <- read_excel("Raw data/fmli213.xlsx")

####  Obtain interviews with rebates from cnt
cnt_rc <- cnt  %>%
  # rename CONTMO
  mutate(RYYMM = ifelse(CONTMO==3,2103,
                        ifelse(CONTMO==4,2104,
                               ifelse(CONTMO==5,2105,
                                      ifelse(CONTMO==6,2106,
                                             ifelse(CONTMO==7,2107,2108)))))) %>% 
  select(NEWID,RYYMM,CHCKEFT,REBTUSED,CONTEXPX) %>%
  # Rename CONTEXPX
  rename(EIPIII = CONTEXPX)

#### Obtain all interviews from fmli's
fmli <- bind_rows(fmli212,fmli213) %>% 
  select(NEWID,QINTRVMO) %>%
  mutate(YYMM= ifelse(QINTRVMO==4,2104,
                      ifelse(QINTRVMO==5,2105,
                             ifelse(QINTRVMO==6,2106,
                                    ifelse(QINTRVMO==7,2107,
                                           ifelse(QINTRVMO==8,2108,2109)))))) %>%
  select(-c(QINTRVMO))

fmli_flt <- fmli %>% 
  select (NEWID) %>% 
  mutate(
    RYYMM = NA,
    CHCKEFT = NA,
    REBTUSED = NA,
    EIPIII = 0
  )

#### Obtain interview without rebates 
# cnt20_nr contains all interviews without rebates reported 
cnt_nr <- fmli_flt %>% filter(!(NEWID %in% cnt_rc$NEWID))

# Merge to form a cnt20_f that contains all information about rebates
cnt_f <- rbind(cnt_rc,cnt_nr)

#### Merge df with fmli (fmli193 to fmli204)
# df now contains all EIPI information as well as other info already in fmli
df <- merge(fmli,cnt_f,by="NEWID")

####  Keep only CUs that are interviewed in April, May, and June 
df <- df %>% mutate(
  ID = substr(as.character(NEWID),1,6))

df_all <- df

apr_list <- df %>% filter(YYMM==2104) %>% select(ID)
may_list <- df %>% filter(YYMM==2105) %>% select(ID)
jun_list <- df %>% filter(YYMM==2106) %>% select(ID)

df <- df %>%
  filter(ID %in% apr_list$ID | ID %in% may_list$ID | ID %in% jun_list$ID) 

#### Obtain weights 
fmli203_wts <- fmli203 %>% select(NEWID,FINLWT21)
fmli204_wts <- fmli204 %>% select(NEWID,FINLWT21)
fmli211_wts <- fmli211 %>% select(NEWID,FINLWT21)
fmli212_wts <- fmli212 %>% select(NEWID,FINLWT21)
fmli213_wts <- fmli213 %>% select(NEWID,FINLWT21)

ID_creator <- function(fmli){
  fmli_weights_income <- fmli %>% 
    mutate(
      ID = substr(as.character(NEWID),1,6),
    ) %>% select(ID, FINLWT21)
  return(fmli_weights_income)}

fmli203_wts <- ID_creator(fmli203)
fmli204_wts <- ID_creator(fmli204)
fmli211_wts <- ID_creator(fmli211) 
fmli212_wts <- ID_creator(fmli212)
fmli213_wts <- ID_creator(fmli213) 

# Merge to obtain the weights, income, and liquidity in each interview 
wts <- merge(fmli203_wts,fmli204_wts,by="ID",all=TRUE)

wts <- wts %>% rename(
  FINLWT21_203 = FINLWT21.x, FINLWT21_204 = FINLWT21.y)

wts <- merge(wts,fmli211_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_211 = FINLWT21)

wts <- merge(wts,fmli212_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_212 = FINLWT21)

wts <- merge(wts,fmli213_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_213 = FINLWT21)

# Average weights 
wts$FINLWT21_AVG <- rowMeans(wts[,c(
  "FINLWT21_203",
  "FINLWT21_204",
  "FINLWT21_211",
  "FINLWT21_212",
  "FINLWT21_213")], 
  na.rm=TRUE)

wts <- wts %>% select(ID,FINLWT21_AVG)

#### merge with weights 
df <- merge(df,wts,by="ID")

#### find only interviews with rebates
df_r <- df %>% filter(EIPIII>0)

#### Aggregating to the CU-month level
# df_r <- df_r %>% group_by(NEWID,RYYMM) %>%
#   mutate(TEIPIII = sum(EIPIII)) %>% 
#   distinct(NEWID,RYYMM, .keep_all = TRUE) %>%
#   select(-c(EIPIII))

### Table I Panel E Column III and IV ####

#### Column III ####
fre(df_r$RYYMM)

#### Column IV ####
fre(df_r$RYYMM,weight=df_r$FINLWT21_AVG)

### Table I Panel F Column III and IV ####

df_cu <- df %>%
  group_by(ID) %>%
  mutate(TotalEIPIII=sum(EIPIII),
         r = ifelse(TotalEIPIII>0,1,0)) %>% 
  distinct(ID,.keep_all = TRUE)

df_cu_nr <- df_cu %>% filter(r==0)

#### Column III ####
2106/5196

#### Column IV ####
sum(df_cu_nr$FINLWT21_AVG)/sum(df_cu$FINLWT21_AVG)

### Total number and dollar amount of EIPs ####
#### merge with weights 
df_all <- merge(df_all,wts,by="ID")

#### Find only interviews with rebates
df_all_r <- df_all %>% filter(EIPIII>0)

# Total number of rebates
fre(df_all_r$RYYMM,weight=df_all_r$FINLWT21_AVG)

agg_eip <- df_all_r %>% 
  group_by(RYYMM) %>% 
  summarise(eip_payments = round(sum(FINLWT21_AVG))) %>% 
  mutate(year = 2000 + floor(RYYMM/100),
         month = RYYMM - floor(RYYMM/100)*100) %>% 
  select(year,month,eip_payments)

write_csv(agg_eip,"agg_eipiii.csv")

# Total amount of payments
sum(df_all_r$EIPIII*df_all_r$FINLWT21_AVG)

